{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Imports and constants"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import yaml\n",
    "\n",
    "SEEDS = [18, 45, 25]\n",
    "\n",
    "NATURAL = [\n",
    "    \"cifar100\",\n",
    "    \"caltech101\",\n",
    "    \"dtd\",\n",
    "    \"flowers102\",\n",
    "    \"pet\",\n",
    "    \"svhn\",\n",
    "    \"sun397\",\n",
    "]\n",
    "SPECIALIZED = [\n",
    "    \"camelyon\",\n",
    "    \"eurosat\",\n",
    "    \"resisc45\",\n",
    "    \"retinopathy\",\n",
    "]\n",
    "STRUCTURED = [\n",
    "    \"clevr_count\",\n",
    "    \"clevr_distance\",\n",
    "    \"dmlab\",\n",
    "    \"kitti\",\n",
    "    \"dsprites_position\",\n",
    "    \"dsprites_orientation\",\n",
    "    \"smallnorb_azimuth\",\n",
    "    \"smallnorb_elevation\",\n",
    "]\n",
    "\n",
    "VTAB_DATASETS = NATURAL + SPECIALIZED + STRUCTURED\n",
    "\n",
    "FGVC_DATASETS = [\n",
    "    \"cub\",\n",
    "    \"nabirds\",\n",
    "    \"oxfordflower\",\n",
    "    \"stanforddogs\",\n",
    "    \"stanfordcars\",\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Helper functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_vtab_mean_std(results_df):\n",
    "    natural_avg = sum([results_df[\"mean\"].loc[d] for d in NATURAL]) / len(NATURAL)\n",
    "    specialized_avg = sum([results_df[\"mean\"].loc[d] for d in SPECIALIZED]) / len(SPECIALIZED)\n",
    "    structured_avg = sum([results_df[\"mean\"].loc[d] for d in STRUCTURED]) / len(STRUCTURED)\n",
    "    natural_std = sum([results_df[\"std\"].loc[d]**2 for d in NATURAL])**(1/2) / len(NATURAL)\n",
    "    specialized_std = sum([results_df[\"std\"].loc[d]**2 for d in SPECIALIZED])**(1/2) / len(SPECIALIZED)\n",
    "    structured_std = sum([results_df[\"std\"].loc[d]**2 for d in STRUCTURED])**(1/2) / len(STRUCTURED)\n",
    "    overall_avg = (natural_avg + specialized_avg + structured_avg) / 3\n",
    "    overall_std = (natural_std**2 + specialized_std**2 + structured_std**2)**(1/2) / 3\n",
    "\n",
    "    print(f\"Natural Avg: {natural_avg * 100:.1f} +- {natural_std * 100:.1f}\")\n",
    "    print(f\"Specialized Avg: {specialized_avg * 100:.1f} +- {specialized_std * 100:.1f}\")\n",
    "    print(f\"Structured Avg: {structured_avg * 100:.1f} +- {structured_std * 100:.1f}\")\n",
    "    print(f\"Global Avg: {overall_avg * 100:.1f} +- {overall_std * 100:.1f}\")\n",
    "\n",
    "    results_list = (\n",
    "        [results_df[\"mean\"].loc[d] for d in NATURAL] + [natural_avg] +\n",
    "        [results_df[\"mean\"].loc[d] for d in SPECIALIZED] + [specialized_avg] +\n",
    "        [results_df[\"mean\"].loc[d] for d in STRUCTURED] + [structured_avg] +\n",
    "        [overall_avg]\n",
    "    )\n",
    "    stds_list = (\n",
    "        [results_df[\"std\"].loc[d] for d in NATURAL] + [natural_std] +\n",
    "        [results_df[\"std\"].loc[d] for d in SPECIALIZED] + [specialized_std] +\n",
    "        [results_df[\"std\"].loc[d] for d in STRUCTURED] + [structured_std] +\n",
    "        [overall_std]\n",
    "    )\n",
    "\n",
    "    print(\" & \".join(map(lambda x: f\"{100*x:.1f}\", results_list)), \"\\\\\\\\\")\n",
    "    print(\" & \".join(map(lambda x: f\"{100*x:.2f}\", stds_list)), \"\\\\\\\\\")\n",
    "\n",
    "\n",
    "def convert_df(eval_df, datasets):\n",
    "    results_df = pd.DataFrame(index=datasets, columns=[f\"seed={s}\" for s in SEEDS] + [\"mean\", \"std\"])\n",
    "\n",
    "    # convert data frame\n",
    "    for dataset in datasets:\n",
    "        accs = {}\n",
    "        for seed in SEEDS:\n",
    "            accs[f\"seed={seed}\"] = eval_df[eval_df.dataset == dataset].query(f\"seed == {seed}\")[\"accuracy\"].item()\n",
    "        row = {f\"seed={s}\": accs[f\"seed={s}\"] for s in SEEDS}\n",
    "        accs_array = np.array(list(accs.values()))\n",
    "        row[\"mean\"] = np.mean(accs_array)\n",
    "        row[\"std\"] = np.std(accs_array)\n",
    "        results_df.loc[dataset] = pd.Series(row)\n",
    "    \n",
    "    return results_df\n",
    "\n",
    "\n",
    "def build_df_from_dir(basedir, datasets):\n",
    "    results_df = pd.DataFrame(index=datasets, columns=[f\"seed={s}\" for s in SEEDS] + [\"mean\", \"std\"])\n",
    "    total_params = 0\n",
    "\n",
    "    for dataset in datasets:\n",
    "        accs = {f\"seed={s}\": np.nan for s in SEEDS}\n",
    "        path = os.path.join(basedir, dataset, f\"seed={SEEDS[0]}/logs/hparams.yaml\")\n",
    "        with open(path, \"r\") as f:\n",
    "            hparams = yaml.load(f, Loader=yaml.loader.SafeLoader)\n",
    "            total_params += hparams[\"trainable_params\"]\n",
    "        for seed in SEEDS:\n",
    "            path = os.path.join(basedir, dataset, f\"seed={seed}/logs/metrics.csv\")\n",
    "            if not os.path.exists(path):\n",
    "                continue\n",
    "            df = pd.read_csv(path, sep=\",\")\n",
    "            if not \"val/accuracy\" in df.keys():\n",
    "                continue\n",
    "            idx = df[\"val/accuracy\"].last_valid_index()\n",
    "            accs[f\"seed={seed}\"] = df.at[idx, \"val/accuracy\"]\n",
    "\n",
    "        # only update complete runs, e.g., all SEEDS present\n",
    "        if np.any(accs):\n",
    "            row = {f\"seed={s}\": accs[f\"seed={s}\"] for s in SEEDS}\n",
    "            accs_array = np.array(list(accs.values()))\n",
    "            row[\"mean\"] = np.mean(accs_array)\n",
    "            row[\"std\"] = np.std(accs_array)\n",
    "            results_df.loc[dataset] = pd.Series(row)\n",
    "\n",
    "    return results_df, total_params"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# VTAB"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get results from train.py logs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BASEDIR = \"output/vtab/adapter_plus_dim8\"\n",
    "\n",
    "results_df, total_params = build_df_from_dir(BASEDIR, VTAB_DATASETS)\n",
    "\n",
    "print(results_df)\n",
    "print_vtab_mean_std(results_df)\n",
    "print(f\"Average parameters: {total_params / 19 / 1e6:.2f}\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get results from eval.py log"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BASEDIR = \"output/vtab/adapter_plus_dim8\"\n",
    "\n",
    "eval_df = pd.read_csv(os.path.join(BASEDIR, \"eval.log\"), sep=\"\\t\", names=[\"dataset\", \"seed\", \"accuracy\"])\n",
    "results_df = convert_df(eval_df, VTAB_DATASETS)\n",
    "\n",
    "print(results_df)\n",
    "print_vtab_mean_std(results_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# FGVC"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get results from train.py logs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BASEDIR = \"output/fgvc/adapter_plus_dim1-32\"\n",
    "\n",
    "results_df, total_params = build_df_from_dir(BASEDIR, FGVC_DATASETS)\n",
    "print(results_df)\n",
    "\n",
    "avg = sum(results_df[\"mean\"]) / 5\n",
    "for d in FGVC_DATASETS:\n",
    "    print(f\"{d}: {results_df['mean'].loc[d] * 100:.1f}\")\n",
    "print(f\"Average: {avg * 100:.1f}\")\n",
    "print(f\"Average parameters: {total_params / 5 / 1e6:.2f}\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get results from eval.py log"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BASEDIR = \"./output/fgvc/adapter_plus_dim1-32\"\n",
    "\n",
    "eval_df = pd.read_csv(os.path.join(BASEDIR, \"eval.log\"), sep=\"\\t\", names=[\"dataset\", \"seed\", \"accuracy\"])\n",
    "results_df = convert_df(eval_df, FGVC_DATASETS)\n",
    "print(results_df)\n",
    "\n",
    "avg = sum(results_df[\"mean\"]) / 5\n",
    "for d in FGVC_DATASETS:\n",
    "    print(f\"{d}: {results_df['mean'].loc[d] * 100:.1f}\")\n",
    "print(f\"Average: {avg * 100:.1f}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "node",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
